#Alexander F. Gazmararian
#afg2@princeton.edu

# Load packages
library(tidyverse)
library(mapproj)

# Load custom functions
source("code/fun/savefig.r")

# Load CBP data
temp <- tempfile()
download.file("https://www2.census.gov/programs-surveys/cbp/datasets/2020/cbp20co.zip", temp)
cbp <- read.csv(unz(temp, "cbp20co.txt"), header = TRUE)

# Subset to highest industry level
cbp_all <-
  cbp %>%
  dplyr::filter(naics == "------") %>%
  dplyr::group_by(fipstate, fipscty) %>%
  dplyr::summarise(emp = sum(emp),
                   ap = sum(ap))

# Create fossil fuel industry subset
cbp_ff <-
  cbp %>%
  dplyr::mutate(
    industry = dplyr::case_when(
      naics %in% c(
        ##Oil and gas
        "213111", #drilling oil and gas wells
        "213112", #support activities for oil and gas operations
        "211120", #crude petroleum extraction
        "211130", #natural gas extraction
        "237120", #oil and gas pipeline and related structures construction
        "486110", #pipeline transportation of crude oil 
        "486210", #pipeline transportation of natural gas
        "486990", #All Other Pipeline Transportation
        "486910", #pipeline transportation of refined petroleum products
        "424710", #petroleum bulk stations and terminals
        "424720", #petroleum and petroleum products merchant wholesalers (except bulk stations and terminals)
        "324110", #petroleum refineries
        "324121", #asphalt paving mixture and block manufacturing
        "324122", #asphalt shingle and coating materials manufacturing
        "324191", #petroleum lubricating oil and grease manufacturing
        "324199", #all other petroleum and coal product manufacturing
        "221210", #natural gas distribution
        "333131", #Mining Machinery and Equipment Manufacturing
        "333132", #Oil and Gas Field Machinery and Equipment Manufacturing
        "332420", #Metal Tank (Heavy Gauge) Manufacturing
        "454310", #Fuel Dealers
        ##chemicals
        "325110", #petrochemical manufacturing
        "325120", #Industrial Gas Manufacturing
        "325130", #Synthetic Dye and Pigment Manufacturing
        "325180", #Other Basic Inorganic Chemical Manufacturing
        "325193", #Ethyl Alcohol Manufacturing
        "325194", #Cyclic Crude, Intermediate, and Gum and Wood Chemical Manufacturing
        "325199", #All Other Basic Organic Chemical Manufacturing
        ##coal
        "212114", #surface coal mining
        "212115", #underground coal mining
        "213113", #support activities for coal mining
        "212111", #Bituminous Coal and Lignite Surface Mining
        "212112", #Bituminous Coal Underground Mining
        "212113", #Anthracite Mining
        #power generation
        "221112", #fossil fuel electric power generation
        #internal combustion engines
        "336310", #Motor Vehicle Gasoline Engine and Engine Parts Manufacturing
        "336350", #Motor Vehicle Transmission and Power Train Parts Manufacturing
        "336320", #Motor Vehicle Electrical and Electronic Equipment Manufacturing
        "336390", #Other Motor Vehicle Parts Manufacturing
        #gas stations
        "447110", #Gasoline Stations with Convenience Stores,
        "447190", #Other Gasoline Stations
        "811191", #Automotive Oil Change and Lubrication Shops
        "811111", #General Automotive Repair
        "811114" #Specialized Automotive Repair
      ) ~ "ff",
      !grepl("-", naics) & !grepl("/", naics) ~ "other"
    )
  ) %>%
  dplyr::filter(industry == "ff") %>%
  dplyr::group_by(fipstate, fipscty) %>%
  dplyr::summarise(ff = sum(emp),
                   ff_ap = sum(ap))

# Merge data
df <- merge(cbp_all, cbp_ff, by = c("fipstate", "fipscty"), all = TRUE)

# Fix fips codes
agg <-
  df %>%
  dplyr::mutate(
    fips = dplyr::case_when(
      nchar(fipscty) == 1 ~ paste0(fipstate, "00", fipscty),
      nchar(fipscty) == 2 ~ paste0(fipstate, "0", fipscty),
      nchar(fipscty) == 3 ~ paste0(fipstate, fipscty)
    ),
    fips = as.numeric(fips)
  )
agg <- subset(agg, select = -c(fipstate, fipscty))
agg$ff <- ifelse(is.na(agg$ff), 0, agg$ff)
agg$ffshare <- with(agg, ff / emp)
agg$ff_ap <- ifelse(is.na(agg$ff_ap), 0, agg$ff_ap)
agg$ffpay_share <- with(agg, ff_ap/ ap)

# Aggregate
agg <-
  agg %>%
  dplyr::mutate(
    ff_cat = dplyr::case_when(
      ffshare <.0078 ~ "<0.78%",
      ffshare>=.0078 & ffshare<.025 ~ "0.78-2.5%",
      ffshare>=0.025 & ffshare<0.05 ~ "2.5-5.0%",
      ffshare>=0.05 & ffshare<.1 ~ "5.0-10.0%",
      ffshare>=.1 ~ "\u2265 10.0%"
    ),
    ff_cat = factor(
      ff_cat,
      ordered = TRUE,
      levels = c(
        "<0.78%", "0.78-2.5%", "2.5-5.0%", "5.0-10.0%", "\u2265 10.0%"
      )
    ),
    ff_ap_cat = dplyr::case_when(
      ffpay_share <.01 ~ "<1%",
      ffpay_share>=.01 & ffpay_share<.025 ~ "1-2.5%",
      ffpay_share>=0.025 & ffpay_share<0.05 ~ "2.5-5%",
      ffpay_share>=0.05 & ffpay_share<.1 ~ "5-10%",
      ffpay_share>=.1 & ffpay_share <.15 ~ "10-15%",
      ffpay_share>=.15 & ffpay_share < 0.25~ "15-25%",
      ffpay_share>=.25 ~ "≥25%"
    ),
    ff_ap_cat = factor(
      ff_ap_cat,
      ordered = TRUE,
      levels = c(
        "<1%", "1-2.5%", "2.5-5%", "5-10%", "10-15%", "15-25%", "≥25%"
      )
    )
  )
fossil_share <- subset(agg, !is.na(fips))

# Download fip codes
fips <- read.csv("https://raw.githubusercontent.com/kjhealy/fips-codes/master/county_fips_master.csv")
fips <- subset(fips, select = c(fips, county_name, state_name))
#merge with fips
fossil_share <- merge(fossil_share, fips, by = "fips", all.x = TRUE)
fossil_share <- subset(fossil_share, !is.na(county_name))
#update county names
fossil_share$county_name[1803] <- "dona ana County"
fossil_share$county_name <- gsub(" County", "", fossil_share$county_name)
fossil_share$county_name <- gsub(" Parish", "", fossil_share$county_name)
fossil_share$county_name <- tolower(fossil_share$county_name)
fossil_share$state_name <- tolower(fossil_share$state_name)
fossil_share <- dplyr::rename(fossil_share, county = county_name, state = state_name)
#fix names
fossil_share$county <- gsub("\\.", "", fossil_share$county)
fossil_share$county <- gsub("saint", "st", fossil_share$county)
fossil_share$county <- gsub("'", "", fossil_share$county)
fossil_share$county <- stringi::stri_trans_general(str = fossil_share$county, id = "Latin-ASCII")
fossil_share$county <- gsub("de ", "de", fossil_share$county)
fossil_share$county <- gsub("la ", "la", fossil_share$county)
fossil_share$county <- gsub("le ", "le", fossil_share$county)
fossil_share$county <- gsub("du ", "du", fossil_share$county)
fossil_share[fossil_share$county == "district of columbia", ]$county <- "washington"
fossil_share[fossil_share$fips == 35013, ]$county <- "dona ana"
fossil_share$county <- gsub(" city", "", fossil_share$county)
fossil_share[fossil_share$county == "oglalalakota", ]$county <- "oglaladakota"

# Load map data
counties <- map_data("county")
counties$subregion <- gsub("de ", "de", counties$subregion)
counties$subregion <- gsub("la ", "la", counties$subregion)
counties$subregion <- gsub("le ", "le", counties$subregion)
counties$subregion <- gsub("du ", "du", counties$subregion)
counties$subregion <- gsub(" city", "", counties$subregion)
#create median indicators
fossil_share$ff_abovemed <- with(fossil_share, ifelse(ffshare > median(ffshare, na.rm = TRUE), 1, 0))
fossil_share$ff_aboveavg <- with(fossil_share, ifelse(ffshare > mean(ffshare, na.rm = TRUE), 1, 0))

# Prep for map
fossil_map <-
  fossil_share %>%
  dplyr::left_join(counties, ., by = c("subregion" = "county", "region" = "state"))
mainstates <- ggplot2::map_data("state")

# Create map
pay.plot <-
  fossil_map %>%
  ggplot() +
  geom_polygon(aes(x = long, y = lat, group = group, fill = ffpay_share), color = "grey45", size = .1) +
  geom_polygon(data = mainstates, aes(x = long, y = lat, group = group), fill="transparent", color = "black", size = .25) +
  scale_fill_gradientn(colors = grey.colors(100, start = 0, end = 1, rev = TRUE), limits = c(0,.25), oob = scales::squish, labels = c("0%", "5%", "10%", "15%", "20%", "≥25%")) +
  coord_map(projection = "lambert", lat = 52, lon = 10) +
  theme_minimal() +
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0)) +
  labs(fill = "", y = "", x = "") +
  theme(
    panel.grid = element_blank(),
    axis.text = element_blank(),
    plot.margin = margin(t=0,l=-45,b=0,r=0),
    legend.position = "right",
    legend.key.width = unit(.5, "cm"),
    legend.box.margin = margin(l=-60, t=15)
  )
pay.plot
savefig(pay.plot, "1.1_figure_ffpaymap", filepath = "figures/", height = 2.7, extension = ".jpeg", device = "jpeg")
